
libname mylib 'g:\Dropbox\Wall Street Bets (Private)\Data';


*This program contains the information needed to create Figure 4 and  Table 8 of the paper;
*The program references:

1) daily_panel_rfs which  is constructed in  Daily Panel RFS (Intermediate File) code
2) dd_posts18_21q2 which contains due dilligence posts from WSB. This data is collected from the pushshift API;



DATA INFO;
SET MYLIB.daily_panel_rfs;

keep net_dd_posts ticker date cum_month gme_amc_flag post_gme
ret1 ret5 ret21 week1-week13
net_dd2  NET_DD2_POST NET_SA2 NET_SA2_POST NON_RESEARCH2 NON_RESEARCH2_POST
DD_and_SA DD_and_SA_POST DD_and_Non_Research DD_and_Non_Research_post net_dd2_pre
 ln_size ln_bm  abn_ret mom5 mom6_26     
BM_MISSING SIZE_MISSING   news_sentiment lag_sent5 lag_sent6_26 SENTIMENT21 total_dd_posts NON_RESEARCH_POSTS;
run;

proc sort data=info;
by ticker date;
run;

data info;
set info;
count +1;
by ticker;
if non_research_posts = . then non_research_posts =0;

l1_non_research = lag(non_research_posts);
l2_non_research = lag2(non_research_posts);
l3_non_research  = lag3(non_research_posts);
l4_non_research  = lag4(non_research_posts);
l5_non_research  = lag5(non_research_posts);
if count <=1 then l1_non_research = .;
if count <=2 then l2_non_research = .;
if count <=3 then l3_non_research = .;
if count <=4 then l4_non_research = .;
if count <=5 then l5_non_research = .;


past_posts = l1_non_research + l2_non_research + l3_non_research + l4_non_research + l5_non_research ;
if past_posts >1 then high_wsb_posts =1;
else high_wsb_posts =0;


lag_ret = lag(abn_ret);
abs_lag_ret = abs(lag_ret);

lag_abs_ret_rank =  abs_lag_ret;


run;



proc sort data=info;
by date;
run;
proc rank data=info out=info groups =100;
by date;
var lag_abs_ret_rank;
run;



data info;
set info;
if lag_abs_ret_rank >=90 then high_abs_ret =1;
else high_abs_ret = 0;
attention = max(high_abs_ret, high_wsb_posts);
run;

data info2;
set info;
where total_dd_posts >0;
keep ticker date high_abs_ret high_wsb_posts attention total_dd_posts gme_amc_flag;
run;






data pp_fund;
set mylib.dd_posts18_21q2;

if pp_num_words = . then delete;

if pp_num_words > funda_num_words then pp =1; else pp =0;


if pp_num_words >0 then pp2=1;
else pp2 =0;


year = year(day0);
month = month(day0);
day = day(day0);
cum_month = (year -2018)* 12 + month;

if cum_month <37 then pre_gme =1;
if cum_month > 37 then post_gme =1;

if cum_month = 37 and day <13 then pre_gme =1;
if cum_month = 37 and day >13 then post_gme =1;
if post_gme = . then post_gme =0;

if cum_month = 37 and day =13 then delete;


if cum_month <=24 then quarter =0;
if 25<=cum_month<=27 then quarter =1;
if 28<=cum_month<=30 then quarter =2;
if 31<=cum_month<=33 then quarter =3;
if 34<=cum_month<=36 then quarter =4;
if cum_month = 37 and post_gme =0 then quarter =4;
if 37<=cum_month<=39 and post_gme=1 then quarter =5;
if 40<=cum_month<=42 and post_gme=1 then quarter =6;
quarter_alt = quarter;
if quarter =2 then quarter_alt =1;

if 7<=cum_month<=42;
run;

proc sql;
create table pp_fund2 as
select *
from pp_fund, info2
where pp_fund.ticker = info2.ticker and pp_fund.day0 = info2.date;
quit;


*figure 4;
** To obtain Panle B impose the filter "where gme_amc_flag  =0" by removing the "*" in the first line of code;
proc sort data=pp_fund2;
by quarter_alt;
run;
proc means data=pp_fund2;
var pp attention;
by quarter_alt;
where gme_amc_flag =0;
run;



*code below generages Specification 1 of panel A;
*specification 2 of Panel A imposes filter: "where gme_amc_flag  =0" 
*specification 3 of Panel A replace pp with PP2;
*specification 4 of Panel A imposes filter: "where gme_amc_flag  =0" and replace pp with PP2;

*Specification 1 of Panel B replace PP with attention;
*Specification 2 of Panel B replace PP with attention imposes filter: "where gme_amc_flag  =0";

*Specification 3 of Panel B replace PP with high_abs_ret;
*Specification 4 of Panel B replace PP with high_wsb_posts;



%let yourdata=pp_fund2;

%let firmid=ticker ;
%let time=date;

%let y=pp ;
%let x=  post_gme;
RUN;




proc surveyreg data=&yourdata;

    cluster &firmid;
    model &y = &x /covb;
    ods output covb=firm;
run;
quit;


*cluster by second dimension (e.g., year);
proc surveyreg data=&yourdata;

    cluster &time;
    model &y = &x /covb;
    ods output covb=year;
run;
quit;

*cluster by intersection of the two dimensions (e.g, firm-year);
proc surveyreg data=&yourdata;
    cluster &firmid &time;
    model &y = &x /covb;
    ods output covb=both;
    ods output parameterestimates=parm;
run;
quit;

*keeps original parameter estimates;
data parm; set parm;
    keep parameter estimate;
run;

*returns a dataset with a scalar for the dimensions of the var/cov matrix. This is needed to extract the square roots of the diagonals later on;
data parm1; set parm;
    n=_n_;
    m=1;
    keep m n;
run;

data parm1; set parm1; by m;
    if last.m;
    keep n;
run;

*uses matrix algebra interface to construct Var-cov matrix and extract the standard errors;
proc iml;
    use both;
    read all var _num_ into Z;
    print Z;
    use firm;
    read all var _num_ into X;
    print X;
    use year;
    read all var _num_ into Y;
    print Y;
    use parm1;
    read all var _num_ into n;
    print n;

    B=X+Y-Z;
    C=I(n);
    D=J(n,1);
    E=C#B;
    F=E*D;
    G=F##.5;

    print B;
    print G;
    create b from G [colname='stderr']; ;
    append from G;
quit;

*creates a dataset called 'results' that contains the parameter estimates, the SE's, and the t-stats;
data results; merge parm B;
    tstat=estimate/stderr;
run;

proc print data=results;
run;




data spec3;
set results;
estimate3 = estimate  ;
obs = _n_;
keep parameter estimate3 obs;
run;
data spec3b;
set results;
estimate3 = tstat ;
obs = _n_ + .5;
keep parameter estimate3 obs;
run;
data spec3;
set spec3 spec3b;
run;
proc sort data=spec3;
by obs;
run;

